{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Predict Images\n",
    "Given a model and images the model is applied and the result is added to the metadata"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip3 install tensorflow==2.4.0 wget==3.2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import wget\n",
    "wget.download(\n",
    "    'https://raw.githubusercontent.com/IBM/claimed/master/component-library/claimed_utils.py'\n",
    ")\n",
    "from claimed_utils import parse_args_to_parameters\n",
    "import tensorflow as tf\n",
    "from tensorflow import keras\n",
    "from claimed_utils import unzip\n",
    "import os\n",
    "import os.path\n",
    "import glob\n",
    "import pandas as pd\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Column name containing the target/prediction value (the real measured value)\n",
    "target_column = os.environ.get('target_column', 'target')\n",
    "\n",
    "# Column name containing the image file name\n",
    "image_column = os.environ.get('image_column', 'filename')\n",
    "\n",
    "# Target image shape (the model expects)\n",
    "image_shape = os.environ.get('image_shape', '400,400')\n",
    "\n",
    "# Column name under which the prediction has to be stored\n",
    "prediction_column = os.environ.get('prediction_column', 'prediction')\n",
    "\n",
    "# data CSV file containing meta information about the images (e.g. real class, ...)\n",
    "metadata = os.environ.get('metadata', 'metadata.csv')\n",
    "\n",
    "# data CSV file containing updated meta information about the images with \"prediction_column\" added\n",
    "output_metadata = os.environ.get('output_metadata', 'metadata.csv')\n",
    "\n",
    "# model zip file name - currently only zipped TensorFlow 2.x pb files supported\n",
    "model_zip = os.environ.get('model_zip', 'model.zip')\n",
    "\n",
    "# zip file name containing all the images\n",
    "data_zip = os.environ.get('data_zip', 'data.zip')\n",
    "\n",
    "parse_args_to_parameters()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "unzip('.', model_zip)\n",
    "unzip('.', data_zip)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = keras.models.load_model('model')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "classes = glob.glob(\"data/*\")\n",
    "classes = map(lambda s: s.split('/')[1], classes)\n",
    "classes = list(classes)\n",
    "classes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(metadata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = []\n",
    "\n",
    "for i, row in df.iterrows():\n",
    "    target_size = 'dummy'  # make the compiler happy\n",
    "    exec('target_size = (' + image_shape + ')')\n",
    "    image = tf.keras.preprocessing.image.load_img(\n",
    "        'data/' + row[target_column] + '/' + row[image_column],\n",
    "        target_size=target_size\n",
    "    )\n",
    "    input_arr = keras.preprocessing.image.img_to_array(image)\n",
    "    input_arr = np.array([input_arr])  # Convert single image to a batch.\n",
    "    predictions = model.predict(input_arr)\n",
    "    prediction = predictions[0]\n",
    "    prediction = classes[np.argmax(prediction)]\n",
    "    dataset.append(prediction)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset = np.array(dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv(metadata)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df[prediction_column] = dataset.tolist()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.to_csv(output_metadata, index=False)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
